Introduction

This note project folder demonstrate the basics of spatial data analysis with R.

Required Package

library(tidyverse)
library(ggthemes)
theme_set(theme_map())

library(sf)
sf_use_s2(FALSE)
library(rnaturalearth)
library(rnaturalearthdata)

Load the World Map

Introducing a new format of geo-referenced data.

world = ne_countries(scale = "medium", type = "map_units", returnclass = "sf")

# world_2 = ne_countries(scale = "medium", type = "map_units", returnclass = "sf")
# View(world_2 |> select(geometry))
world |> ggplot() + geom_sf()

names(world)
##  [1] "scalerank"  "featurecla" "labelrank"  "sovereignt" "sov_a3"    
##  [6] "adm0_dif"   "level"      "type"       "admin"      "adm0_a3"   
## [11] "geou_dif"   "geounit"    "gu_a3"      "su_dif"     "subunit"   
## [16] "su_a3"      "brk_diff"   "name"       "name_long"  "brk_a3"    
## [21] "brk_name"   "brk_group"  "abbrev"     "postal"     "formal_en" 
## [26] "formal_fr"  "note_adm0"  "note_brk"   "name_sort"  "name_alt"  
## [31] "mapcolor7"  "mapcolor8"  "mapcolor9"  "mapcolor13" "pop_est"   
## [36] "gdp_md_est" "pop_year"   "lastcensus" "gdp_year"   "economy"   
## [41] "income_grp" "wikipedia"  "fips_10"    "iso_a2"     "iso_a3"    
## [46] "iso_n3"     "un_a3"      "wb_a2"      "wb_a3"      "woe_id"    
## [51] "adm0_a3_is" "adm0_a3_us" "adm0_a3_un" "adm0_a3_wb" "continent" 
## [56] "region_un"  "subregion"  "region_wb"  "name_len"   "long_len"  
## [61] "abbrev_len" "tiny"       "homepart"   "geometry"

Load the Conflict Data

d = read_csv("Lec_09/data/GEDEvent_v22_1.csv")

Get Point-Referenced Conflict Data in 2021

d_event_2021 = d |> filter(year == 2021)

Point Point-Referenced Conflict Data

ggplot() + 
  geom_sf(data = world) +
  geom_point(data = d_event_2021, 
             aes(x = longitude, y = latitude),
             alpha = 0.2) 

Look at only conflicts in Africa.

ggplot() + 
  geom_sf(data = world |> filter(region_un == "Africa")) +
  geom_point(data = d_event_2021 |> filter(region == "Africa"), 
             aes(x = longitude, y = latitude),
             alpha = 0.2) 

Get Areal Conflict Data in 2021

d_country_2021 = d |>
  filter(year == 2021) |>
  group_by(country_id, country, region) |>
  summarise(
    n_conflict = n()
  )

d_country_2021 = d_country_2021 |> arrange(-n_conflict)

Merge Map with Data

world_m = world |>
  left_join(d_country_2021, by = c("sovereignt" = "country"))

world_m |>
  select(sovereignt, n_conflict)
## Simple feature collection with 264 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961
## CRS:           +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## First 10 features:
##              sovereignt n_conflict                       geometry
## 1           Netherlands         NA MULTIPOLYGON (((-69.89912 1...
## 2   Antigua and Barbuda         NA MULTIPOLYGON (((-61.71606 1...
## 3   Antigua and Barbuda         NA MULTIPOLYGON (((-61.74712 1...
## 4           Afghanistan       4510 MULTIPOLYGON (((74.89131 37...
## 5                Angola          4 MULTIPOLYGON (((14.19082 -5...
## 6        United Kingdom         NA MULTIPOLYGON (((-63.00122 1...
## 7               Albania         NA MULTIPOLYGON (((20.06396 42...
## 8               Finland         NA MULTIPOLYGON (((20.61133 60...
## 9               Andorra         NA MULTIPOLYGON (((1.706055 42...
## 10 United Arab Emirates         NA MULTIPOLYGON (((53.92783 24...
summary(world_m$n_conflict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     1.0     4.0    42.0   257.4   172.2  4510.0     208

Plot Country-Level Conflict Data

ggplot() + 
  geom_sf(data = world_m, aes(fill = n_conflict)) +
  scale_fill_viridis_c(option = "B", direction = -1, trans = "log")

Extension: Cartogram

Reproject the map to change the sizes regions according to a variable of interest.

Read this Wikipedia page for a detailed introduction of Cartogram: https://en.wikipedia.org/wiki/Cartogram

Case: Draw Cartograms of conflicts in Africa

Personally, this is the most interesting type of map.

# install.packages("cartogram")
library(cartogram)
# Cartogram by the number of conflict events in Africa
world_m_africa = world_m |>
  filter(region == "Africa") |>
  mutate(geometry = st_transform(geometry, 3857)) # Specify a projection of the map. Essential

# NEW: Fill in NA n_conflict
summary(world_m_africa$n_conflict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     5.5    52.0   122.7   193.0   627.0
# Re-project the map using the cartogram package
world_m_africa_cart = world_m_africa |>
  cartogram_cont(weight = "n_conflict")

# Plot a choropleth map
ggplot(data = world_m_africa) +
  geom_sf(aes(fill = n_conflict)) +
  geom_sf_label(aes(label = sovereignt)) +
  scale_fill_viridis_c(option = "B", direction = -1, trans = "log")

# Plot a cartogram
ggplot(data = world_m_africa_cart) +
  geom_sf() +
  geom_sf_label(aes(label = sovereignt))

# Diagrammic (Dorling) catograms -- some further extraction
world_m_africa_cart_dorling = world_m_africa |>
  cartogram_dorling(weight = "n_conflict")

ggplot(data = world_m_africa_cart_dorling) +
  geom_sf() +
  geom_sf_text(aes(label = sovereignt))

Read more: https://r-charts.com/spatial/cartogram-ggplot2/

Matching Countries’ Names Smartly

In cross-country analysis, what brings the most trouble is the matching of country identifiers. One thing that goes unnoticed in the above steps is that some contries “fell out” of the study siliently. We have a handy tool to help match countries from different datasets correctly.

# Identify the problem. Which countries fell out of the study because their identifies do not match?
# Use the anti_join function
d_country_2021 |>
  anti_join(world, c("country" = "sovereignt"))
## # A tibble: 6 × 4
## # Groups:   country_id, country [6]
##   country_id country                         region      n_conflict
##        <dbl> <chr>                           <chr>            <int>
## 1        775 Myanmar (Burma)                 Asia               848
## 2        490 DR Congo (Zaire)                Africa             781
## 3        678 Yemen (North Yemen)             Middle East        764
## 4        572 Kingdom of eSwatini (Swaziland) Africa              10
## 5        510 Tanzania                        Africa               8
## 6        365 Russia (Soviet Union)           Europe               2
# Keep the entries that are NOT matched
# install.packages("countrycode")
library(countrycode)
d_country_2021_t = d_country_2021 |>
  mutate(
    iso3c = countrycode(country, "country.name", "iso3c")
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `iso3c = countrycode(country, "country.name", "iso3c")`.
## ℹ In group 48: `country_id = 678`, `country = "Yemen (North Yemen)"`.
## Caused by warning in `countrycode_convert()`:
## ! Some values were not matched unambiguously: Yemen (North Yemen)
# Check: There is a warning --> Some country names fail to be parsed.
d_country_2021_t |>
  select(country, iso3c) |>
  filter(is.na(iso3c))
## # A tibble: 1 × 3
## # Groups:   country_id, country [1]
##   country_id country             iso3c
##        <dbl> <chr>               <chr>
## 1        678 Yemen (North Yemen) <NA>
# Remove the (North Yemen) part and redo it
d_country_2021_t = d_country_2021 |>
  mutate(
    country = recode(country, "Yemen (North Yemen)" = "Yemen")
  ) |>
  mutate(
    iso3c = countrycode(country, "country.name", "iso3c")
  )
# Re-do the steps that join the world map and the data. This time, using ISO3C as the country identifier.

world_m = world |>
  left_join(d_country_2021_t, by = c("iso_a3" = "iso3c"))

# Join continent's names using countrycode
world_m = world_m |>
  mutate(
    continent = countrycode(iso_a3, "iso3c", "continent")
  )
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `continent = countrycode(iso_a3, "iso3c", "continent")`.
## Caused by warning in `countrycode_convert()`:
## ! Some values were not matched unambiguously: ATA, ATF, CCK, HMD, IOT, SGS
# Cartogram by the number of conflict events in Africa
world_m_africa = world_m |>
  filter(continent == "Africa") |>
  mutate(geometry = st_transform(geometry, 3857))

summary(world_m_africa$n_conflict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00    6.75   52.00  135.96  193.00  781.00      30
# # Impute ZEROS
# world_m_africa = world_m_africa |>
#   mutate(n_conflict = replace_na(n_conflict, 1))

# summary(world_m_africa$n_conflict)

# Re-project the map using the cartogram package
world_m_africa_cart = world_m_africa |>
  cartogram_cont(weight = "n_conflict")
## Warning in cartogram_cont.sf(world_m_africa, weight = "n_conflict"): NA not
## allowed in weight vector. Features will be removed from Shape.
# Plot a choropleth map
ggplot(data = world_m_africa) +
  geom_sf(aes(fill = n_conflict)) +
  geom_sf_label(aes(label = sovereignt)) +
  scale_fill_viridis_c(option = "B", direction = -1, trans = "log")

# Plot a cartogram
ggplot(data = world_m_africa_cart) +
  geom_sf() +
  geom_sf_label(aes(label = iso_a3))

# Diagrammic (Dorling) catograms
world_m_africa_cart_dorling = world_m_africa |>
  filter(!is.na(n_conflict)) |> 
  # Note: The cartogram_dorling function will report error if NA values exist.
  cartogram_dorling(weight = "n_conflict")

ggplot(data = world_m_africa_cart_dorling) +
  geom_sf() +
  geom_sf_text(aes(label = sovereignt))